---
title: "Data Log"
author: "Tommy Flynn"
toc: true
number-sections: true
format:
html:
code-tools: true
code-fold: false
code-copy: true
code-summary: "code"
code-block-bg: true
code-block-border-left: "#31BAE9"
---
# Setup
```{r setup, message=FALSE}
#| code-fold: true
knitr:: opts_chunk$ set (error = FALSE ,
tidy = TRUE ,
# tidy.opts = list(width.cutoff = 60),
message = FALSE ,
warning = FALSE )
library (tidyverse)
library (readxl)
library (readr)
library (haven)
library (knitr)
library (kableExtra)
library (lubridate)
library (labelled)
library (gtsummary)
```
```{r directories}
#| code-fold: true
# Working directory saved as `wd` and data path as `data_path`
wd <- getwd ()
data_path <- paste (wd, "Data" , sep = "/" )
```
# Identification numbers
## `id_sid_matchup`
```{r id_sid_matchup}
# Import & clean id_sid_matchup2.sas7bdat
id_sid_matchup2 <- read_sas (paste (data_path, "ids" , "id_sid_matchup2.sas7bdat" , sep = "/" ))
id_sid_clean <- id_sid_matchup2 %>%
transmute (sid,
newsid = as.character (newsid),
shift_num = str_extract (shift_num_ampm, "[:digit:]+" ),
date = d8,
am_pm = str_extract (shift_num_ampm, "am|pm" ),
shift_start_dttm = startd8time,
shift_start_tm = ShiftStart,
shift_end_dttm = ymd_hms (paste (date, ShiftEnd)),
shift_end_tm = ShiftEnd,
staff = as_factor (staff))
# preview id_sid_match_cleaned
rmarkdown:: paged_table (id_sid_clean)
# Save id_sid_clean to files in csv and Rdata format
# write_csv(id_sid_clean, path = paste(data_path, "Rdata", "id_sid_clean.Rdata", sep = "/"))
# write_csv(id_sid_clean, path = paste(data_path, "csv_files", "id_sid_clean.csv", sep = "/"))
```
## `id_final`
```{r ids_final}
ids_final <- read_sas (paste (data_path, "ids" , "finalsids6944.sas7bdat" , sep = "/" ))
# preview ids_final (no cleaning needed)
rmarkdown:: paged_table (ids_final)
# Check if sids repeate within shifts:
ids_final %>%
group_by (shift_num) %>%
count (sid) %>%
filter (n > 1 ) %>%
rmarkdown:: paged_table ()
```
## `pt_sids`
```{r pt_sids}
pt_sids <- read_sas (paste (data_path, "ids" , "patientSIDs_in_completepat.sas7bdat" , sep = "/" ))
# preview unclean pt_sids
# glimpse(pt_sids)
pt_sid_clean <- pt_sids %>%
transmute (sid,
shift_num = str_extract (shift_num_ampm, "[:digit:]+" ),
shift_ampm = as_factor (str_extract (shift_num_ampm, "am|pm" )))
# preview pt_sid_clean
rmarkdown:: paged_table (pt_sid_clean)
pt_sid_clean %>%
group_by (shift_num) %>%
count (shift_ampm) %>%
arrange (shift_num) %>%
rmarkdown:: paged_table ()
pt_sid_clean %>%
count (shift_ampm) %>%
rmarkdown:: paged_table ()
# Save to file
# write_rds(pt_sid_clean, path = paste(data_path, "Rdata", "pt_sid_clean.Rdata", sep = "/"))
# write_csv(pt_sid_clean, path = paste(data_path, "csv_files", "pt_sid_clean.csv", sep = "/"))
# generate_dictionary(pt_sid_clean)
```
##`staff_sids`
```{r staff_sids}
staff_sids <- read_sas (paste (data_path, "ids" , "staffsids_in_eventfile.sas7bdat" , sep = "/" ))
str (staff_sids)
staff_sids_clean <- staff_sids %>%
mutate (across (.cols = ! sid, .fns = as.factor)) %>%
select (- job)
rmarkdown:: paged_table (staff_sids_clean)
staff_sids_clean %>%
count (ParticipantCat4) %>%
rmarkdown:: paged_table ()
# Save to file
# write_csv(staff_sids_clean, file = paste(data_path, "Rdata", "staff_sids_clean.Rdata", sep = "/"))
# write_csv(staff_sids_clean, file = paste(data_path, "csv_files", "staff_sids_clean.csv", sep = "/"))
# generate_dictionary(staff_sids_clean)
```
## `allstaff`
```{r allstaff_clean}
allstaff <- read_sas (paste (data_path, "staff" , "allstaff_numbyshift.sas7bdat" , sep = "/" ))
# str(allstaff)
allstaff_clean <- allstaff %>%
mutate (job_title = as_factor (JOBTITLE),
.keep = "unused" )
# write_rds(allstaff_clean, paste(data_path, "Rdata", "allstaff_clean.Rdata", sep = "/"))
# write_csv(allstaff_clean, paste(data_path, "csv_files", "allstaff_clean.csv", sep = "/"))
rmarkdown:: paged_table (allstaff_clean)
# generate_dictionary(allstaff_clean)
```
## `ntab2`
```{r ntab}
ntab2 <- read_csv (file = paste (data_path, "ids" , "nTab2.csv" , sep = "/" ))
# glimpse(ntab2)
ntab2_clean <- ntab2 %>%
transmute (observation = obs,
sid = sids,
id = as.character (ID),
date = dmy (D8),
shift_num = shiftnum,
shift_person = person,
staff = as_factor (staff),
degree,
wdegree,
WtAlpha)
rmarkdown:: paged_table (ntab2_clean)
# save to file
# write_rds(ntab2_clean, file = paste(data_path, "Rdata", "ntab2_clean.Rdata", sep = "/"))
# write_csv(ntab2_clean, file = paste(data_path, "csv_files", "ntab2_clean.csv", sep = "/"))
# generate_dictionary(ntab2_clean)
```
ntab2 lists `r max(ntab2_clean$observation)` observations inclusive of all shifts. These observations list up to `r max(ntab2_clean$person)` individuals per shift, all 81 shifts.
# Shift Info
## `shift_times`
```{r shift_times}
shift_times <- read_xls (paste (data_path, "shifts" , "Shift Start End times v3.xls" , sep = "/" ),
col_names = c ("date" ,
"shift_start_tm" ,
"shift_end_tm" ,
"shift_ampm" ,
"comments" ),
col_types = c ("date" ,
"guess" ,
"guess" ,
"text" ,
"text" ,
"skip" ,
"skip" ,
"skip" ,
"skip" ,
"skip" ,
"skip" ),
skip = 1 )
# shift_times %>%
# str()
rmarkdown:: paged_table (shift_times)
# generate_dictionary(shift_times)
# write_csv(shift_times, file = paste(data_path, "csv_files", "shift_times.csv", sep = "/"))
# write_rds(shift_times, file = paste(data_path, "Rdata", "shift_times.Rdata", sep = "/"))
```
## `counts_per_shift`
```{r counts_per_shift}
counts_per_shift <- read_sas (paste (data_path, "shifts" , "countspershift.sas7bdat" , sep = "/" ))
# str(counts_per_shift)
countspershift_clean <- counts_per_shift %>%
transmute (shift_num,
date = d8,
am_pm = str_extract (shift_num_ampm, "am|pm" ),
staffcount = as.integer (staffcount),
patcount = as.integer (patcount),
total = as.integer (total))
rmarkdown:: paged_table (countspershift_clean)
# write_rds(countspershift_clean, file = paste(data_path, "Rdata", "countspershift_clean.Rdata", sep = "/"))
# write_csv(countspershift_clean, file = paste(data_path, "csv_files", "countspershift_clean.csv", sep = "/"))
```
## `person_dataset`
```{r person_dataset}
person_dataset <- read_sas (paste (data_path, "patients" , "person_dataset.sas7bdat" , sep = "/" ))
person_dataset <- person_dataset %>%
transmute (shift_num = as.integer (shiftnum),
person = as.character (person),
staff = as_factor (staff),
degree,
minutes)
rmarkdown:: paged_table (person_dataset)
# generate_dictionary(person_dataset)
# write_rds(person_dataset, paste(data_path, "Rdata", "person_dataset.Rdata", sep = "/"))
# write_csv(person_dataset, paste(data_path, "csv_files", "person_dataset.csv", sep = "/"))
```
# Room Info
### `location_categories`
```{r location_cats}
location_cats <- read_sas (paste (data_path, "rooms" , "location_categories_list.sas7bdat" , sep = "/" ))
rmarkdown:: paged_table (location_cats)
```
### `location_defs`
```{r location_defs}
location_defs <- read_sas (paste (data_path, "rooms" , "location_definitions.sas7bdat" , sep = "/" ))
rmarkdown:: paged_table (location_defs)
# generate_dictionary(location_defs)
```
### `room_locs`
```{r room_locs}
room_locs <- read_xlsx (paste (data_path,
"rooms" ,
"Room Locations and Square Footage - with Correction.xlsx" ,
sep = "/" ),
col_names = c ("id" ,
"name" ,
"type" ,
"sqft_area" ),
col_types = c ("text" ,
"text" ,
"text" ,
"numeric" ,
"skip" ),
skip = 1 )
rmarkdown:: paged_table (room_locs)
```
### `locations_list`
```{r locations_list}
locations_list <- read_sas (paste (data_path,
"rooms" ,
"locationslist.sas7bdat" ,
sep = "/" ))
rmarkdown:: paged_table (locations_list)
```
# Patient Data
## Patient Demographics
Demographic information with med_rec numbers and encounter numbers. Note that **med_rec numbers may repeat!**
### `demographics_df`
```{r demographics_df}
demographic_df <- read_csv (paste (data_path, "patients" , "Population_Patient_Demographics.csv" , sep = "/" ))
# glimpse(demographic_df)
demographics_clean <- demographic_df %>%
transmute (med_rec_no = as.character (` Medical Record Number ` ),
encounter_no = as.character (` Enc . ` ),
arrival_date = mdy (` ED Arrival Timestamp ` ),
departure_date = mdy (` ED Departure Timestamp ` ),
# across(!where(is.Date)
# & !where(is.double)
# & !where(is.integer)
# & !med_rec_no
# & !encounter_no
# & !`ED Arrival Timestamp`
# & !`ED Departure Timestamp`
# & !`Chief Complaint`
# & !`% ED Patients Admitted as Inpt or Obs`,
# .fns = as_factor),
sex = as_factor (Sex, ),
age = ` Patient Age at Visit ` ,
race = as_factor (Race),
chief_complaint = ` Chief Complaint ` ,
acuity = as_factor (Acuity),
arrival_mode = as_factor (` Arr Mode ` ),
disposition = as_factor (` ED Disposition ` )) %>%
# str()
# demographics_clean_test %>%
mutate (across (where (is.factor), ~ recode (.x, ` Not Recorded ` = NA_character_ , ` Unknown ` = NA_character_ ))) %>%
mutate (acuity = fct_relevel (acuity,
"1 Immediate" ,
"2 Emergent" ,
"3 Urgent" ,
"4 Stable" ,
"5 Non Urgent" ),
arrival_cat = fct_recode (arrival_mode,
"EMS" = "EMS Ground" ,
"EMS" = "EMS Air" ,
"Custody" = "Police/Jail" ,
"Ambulatory" = "Private Vehicle" ,
"Ambulatory" = "Public Trans" ,
"Ambulatory" = "Walk" ),
race = fct_relevel (race,
"Black" ,
"White" ,
"Hispanic" ,
"Asian" ,
"Other" ,
"1" ,
"2" ,
"3" ), .after = 10 )
# generate_dictionary(demographics_clean_test)
# generate_dictionary(demographics_clean)
rmarkdown:: paged_table (demographics_clean)
demographics_clean %>%
count (race) %>%
rmarkdown:: paged_table ()
demographics_clean %>%
group_by (acuity) %>%
summarise (` Total (n) ` = n (),
` Black (n) ` = sum (race == "Black" , na.rm = TRUE ),
` Black (%) ` = round ((` Black (n) ` / ` Total (n) ` )* 100 , 2 ),
` White (n) ` = sum (race == "White" , na.rm = TRUE ),
` White (%) ` = round ((` White (n) ` / ` Total (n) ` )* 100 , 2 ),
` Hispanic (n) ` = sum (race == "Hispanic" , na.rm = TRUE ),
` Hispanic (%) ` = round ((` Hispanic (n) ` / ` Total (n) ` ) * 100 , 2 ),
` Other (n) ` = sum (race == "Other" , na.rm = TRUE ),
` Other (%) ` = round ((` Other (n) ` / ` Total (n) ` ) * 100 , 2 ),
` Missing (n) ` = sum (is.na (race)),
` Missing (%) ` = round ((` Missing (n) ` / ` Total (n) ` ) * 100 , 2 )) %>%
rename (Acuity = acuity) %>%
rmarkdown:: paged_table ()
# write_rds(demographics_clean, paste(data_path, "Rdata", "demographics_clean.Rdata", sep = "/"))
# write_csv(demographics_clean, paste(data_path, "csv_files", "demographics_clean.csv", sep = "/"))
```
### `pop_pt_demographics`
```{r pop_pt_demographics}
pop_pt_demographics <- read_csv (paste (data_path, "patients" , "Population_Patient_Demographics.csv" , sep = "/" ))
nms <- c ("arrival_date" ,
"med_rec_no" ,
"encounter_no" ,
"sex" ,
"age" ,
"race" ,
"chief_complaint" ,
"acuity" ,
"arrival_mode" ,
"departure_date" ,
"disposition" ,
"prop_admitted" )
names (pop_pt_demographics) <- nms
# str(pop_pt_demographics)
# generate_dictionary(pop_pt_demographics)
pop_pt_demo_clean <- pop_pt_demographics %>%
mutate (med_rec_no = as.character (med_rec_no),
encounter_no = as.character (encounter_no),
arrival_date = mdy (arrival_date), .after = 3 ,
departure_date = mdy (departure_date),
sex = as_factor (sex),
race = as_factor (race),
acuity = as_factor (acuity),
arrival_mode = as_factor (arrival_mode)) %>%
mutate (across (where (is.factor), ~ recode (.x, "Not Recorded" = NA_character_ , "Unknown" = NA_character_ ))) %>%
mutate (acuity = fct_relevel (acuity,
"1 Immediate" ,
"2 Emergent" ,
"3 Urgent" ,
"4 Stable" ,
"5 Non Urgent" ),
arrival_cat = fct_recode (arrival_mode,
"EMS" = "EMS Ground" ,
"EMS" = "EMS Air" ,
"Custody" = "Police/Jail" ,
"Ambulatory" = "Private Vehicle" ,
"Ambulatory" = "Public Trans" ,
"Ambulatory" = "Walk" ),
.after = 9 ,
race = fct_relevel (race,
"Black" ,
"White" ,
"Hispanic" ,
"Asian" ,
"Other" ,
"1" ,
"2" ,
"3" ))
rmarkdown:: paged_table (pop_pt_demo_clean)
# generate_dictionary(pop_pt_demo_clean)
# write_csv(pop_pt_demo_clean, file = paste(data_path, "csv_files", "pop_pt_demo_clean.csv", sep = "/"))
# write_rds(pop_pt_demo_clean, file = paste(data_path, "Rdata", "pop_pt_demo_clean.Rdata", sep = "/"))
```
### `pts_population`
```{r pts_population}
pts_population <- read_sas (paste (data_path, "patients" , "Patients_population.sas7bdat" , sep = "/" ))
rmarkdown:: paged_table (pts_population)
# generate_dictionary(pts_population)
# write_csv(pts_population, file = paste(data_path, "csv_files", "pts_population.csv", sep = "/"))
# write_rds(pts_population, file = paste(data_path, "Rdata", "pts_population.Rdata", sep = "/"))
```
### `pts_pop2`
```{r pts_pop2}
pts_pop2 <- read_sas (paste (data_path, "patients" , "patients_population2.sas7bdat" , sep = "/" ))
rmarkdown:: paged_table (pts_pop2)
# generate_dictionary(pts_pop2)
# write_csv(pts_pop2, file = paste(data_path, "csv_files", "pts_pop2.csv", sep = "/"))
# write_rds(pts_pop2, file = paste(data_path, "Rdata", "pts_pop2.Rdata", sep = "/"))
```
"completepat.sas7bdat" contains RFID badge locations (by *room number*) for each sampled patient for every second they were monitored in the ED.
The file is large, so subset a random sample of observations with `slice_sample()` to test code for wrangling.
Use `pivot_longer()` to reshape from wide format to long format by collapsing all location-by-second columns (variables `flocX$` , where `X` is the shift time in seconds that the respective SID was identified in room number `flocX` ) into two columns, names set to `seconds` and values to `location` .
> Note that `pivot_longer()` will create *many duplicate SIDs and many* `NA` s which will require filtering.
## Patient RFID Data
### `pt_complete`
```{r pt_complete}
#| code-fold: true
pt_complete <- read_sas (paste (data_path, "patients" , "completepat.sas7bdat" , sep = "/" ))
# glimpse(pt_complete)
# 1. subset a random sample of 30 observations for data transformation code preparation
# set.seed(711)
# pt_complete_sample <- pt_complete %>% slice_sample(n = 30)
# pt_complete_sample %>%
pt_complete_long <- pt_complete %>%
# 2. Pivot the data.frame from wide to long by placing all column names that start with "floc" into a new column, "seconds," and placing respective observations for each "floc" variable into a "location_num" column
pivot_longer (cols = starts_with ("floc" ),
names_to = "shift_second" , # moves column names to new column "seconds"
values_to = "location_num" , # moves location numbers to new column "location_num"
values_drop_na = TRUE ) %>% # removes all rows (observations) without location data
# 3. Delete the prefix "floc" from `seconds` and change class to numeric
mutate (shift_second = as.double (str_replace (shift_second, "floc" , "" )),
location_num = as_factor (location_num),
shift_num_ampm = str_trim (shift_num_ampm),
am_pm = as_factor (str_extract (shift_num_ampm, "am|pm" )),
shift_num = as.integer (str_extract (shift_num_ampm, "[:digit:]+" )),
date = make_date (year = year, month = mon, day = day)) %>%
select (sid, date, shift_num, am_pm, location_num, shift_second) %>%
# count(am_pm)
mutate (day_night = as_factor (if_else (am_pm == "am" , "day" , "night" )),
.after = 4 ) # %>%
# glimpse()
# Save as .Rdata file
rmarkdown:: paged_table (pt_complete_long)
# write_rds(pt_complete_long, file = paste(data_path, "Rdata", "pt_complete_long.Rdata", sep = "/"))
# write_csv(pt_complete_long, file = paste(data_path, "csv_files", "pt_complete_long.csv", sep = "/"))
# generate_dictionary(pt_complete_long)
```
> After transforming & cleaning "completepat.sas7bdat", `pt_complete_long` is __`r ncol(pt_complete_long)`__ variables wide and __`r nrow(pt_complete_long)`__ rows long.
# Staff Data
## Staff RFID data
The SAS data file, "completestaff.sas7bdat," contains RFID badge location (by *room number*) for all staff each second of every shift.
### `staff_complete`
```{r staff_complete}
# 1. pull "completestaff.sas7bdat" from `sas_data_list` as `tibble()`
staff_complete <- read_sas (paste (data_path, "staff" , "completestaff.sas7bdat" , sep = "/" ))
# Subset a random sample of 30 observations
# set.seed(321)
# staff_complete_sample <- staff_complete %>% slice_sample(n = 30)
# 2. Use a random sample of staff_complete for data cleaning & transformation
staff_complete_long <- staff_complete %>%
# 2a. Pivot the data.frame from wide to long by placing all column names that start with "floc" into a new column, "seconds," and placing respective observations for each "floc" variable into a "location_num" column
pivot_longer (cols = starts_with ("floc" ),
names_to = "seconds" ,
values_to = "location_num" ,
values_drop_na = TRUE ) %>%
# 2b. Remove the prefix "floc" from `time_seconds` and keep the digits as `seconds`
mutate (seconds = as.double (str_replace (seconds, "floc" , "" )),
location_num = as_factor (location_num),
shift_num_ampm = str_trim (shift_num_ampm),
shift_num = as.integer (str_extract (shift_num_ampm, "[:digit:]+" )),
am_pm = as_factor (str_extract (shift_num_ampm, "am|pm" )),
date = make_date (year = year, month = mon, day = day)) %>%
select (- d8:- mon)
# staff_complete_sample2
# write_rds(staff_complete_long, file = paste(data_path, "Rdata", "staff_complete_long.Rdata", sep = "/"))
# write_csv(staff_complete_long, file = paste(data_path, "csv_files", "staff_complete_long.csv", sep = "/"))
# 3. View data frame structure
# generate_dictionary(staff_complete_long)
rmarkdown:: paged_table (staff_complete_long)
```
# Network Data
## Whole Networks
### `net_all_xl`
```{r net_all_xl}
#| label: "net_all"
net_all_xl <- read_xlsx (paste (data_path, "networks" , "network_allshifts.xlsx" , sep = "/" ))
str (net_all_xl)
net_all_xl %>%
rmarkdown:: paged_table ()
```
### `net_all_sas`
```{r net_all_sas}
net_all_sas <- read_sas (paste (data_path, "networks" , "network_allshifts.sas7bdat" , sep = "/" ))
# str(net_all_sas)
net_all_sas %>%
rmarkdown:: paged_table ()
generate_dictionary (net_all_sas)
```
## Edges
### `edges_all`
```{r edges_all}
#| label: "edges_all"
edges_all <- read_sas (paste (data_path, "networks" , "allshifts_edges.sas7bdat" , sep = "/" ))
# str(edges_all)
edges_all %>%
rmarkdown:: paged_table ()
```
```{r edges2}
#| label: "edges2"
edges2 <- read_sas (paste (data_path, "networks" , "edges2.sas7bdat" , sep = "/" ))
rmarkdown:: paged_table (edges2)
```
## Events
### `events_long`
```{r events_long}
events_long <- read_sas (paste (data_path, "events" , "eventfile_long2.sas7bdat" , sep = "/" ))
rmarkdown:: paged_table (events_long)
# write_csv(events_long, file = paste(data_path, "csv_files", "events_long.csv", sep = "/"))
```
`events_long` has duplicates, use `eventfile_nodup` if needed.
### `events_unique`
```{r events_unique}
events_unique <- read_sas (paste (data_path, "events" , "events_uniquepersid2.sas7bdat" , sep = "/" ))
rmarkdown:: paged_table (events_unique)
```
### `eventfile_nodup`
```{r eventfile_nodup}
eventfile_nodup <- read_sas (paste (data_path, "events" , "eventfile_long6944_noduploc.sas7bdat" , sep = "/" ))
eventfile_nodup %>%
rmarkdown:: paged_table ()
# write_csv(eventfile_nodup, file = paste(data_path, "csv_files", "eventfile_nodup.csv", sep = "/"))
# write_rds(eventfile_nodup, file = paste(data_path, "Rdata", "eventfile_nodup.Rdata", sep = "/"))
```
# Data Dictionary
```{r dictionary}
dictionary <- lst (id_sid_clean = names (id_sid_clean),
ids_final = names (ids_final),
pt_sid_clean = names (pt_sid_clean),
person_dataset = names (person_dataset),
allstaff_clean = names (allstaff_clean),
staff_sids_clean = names (staff_sids_clean),
shift_times = names (shift_times),
ntab2_clean = names (ntab2_clean),
demographics_clean = names (demographics_clean),
pop_pt_demo_clean = names (pop_pt_demo_clean),
pts_pop2 = names (pts_pop2),
pt_complete_long = names (pt_complete_long),
staff_complete_long = names (staff_complete_long),
location_defs = names (location_defs),
net_all_xl = names (net_all_xl),
net_all_sas = names (net_all_sas),
edges_all = names (edges_all),
edges2 = names (edges2),
events_long = names (events_long),
events_unique = names (events_unique),
eventfile_nodup = names (eventfile_nodup))
dictionary_tbl <- unnest ((tibble (names = names (dictionary), var_list = dictionary)))
# if you make changes, rewrite to csv file:
# write_csv(dictionary_tbl, file = paste(data_path, "dictionaries", "dictionary_tbl.csv", sep = "/"))
rmarkdown:: paged_table (dictionary_tbl)
```
# SessionInfo
```{r SessionInfo}
sessioninfo:: session_info ()%>%
details:: details (
summary = 'Current session info' ,
open = FALSE
)
```